%%configure
{ "name":"Spark-to-Cosmos_DB_Change_Feed_Connector",
"executorMemory": "8G",
"executorCores": 2,
"numExecutors": 2,
"driverCores": 2,
"jars": ["wasb:///example/jars/azure-cosmosdb-spark_2.4.0_2.11-1.3.4-uber.jar", "wasb:///example/jars/azure-documentdb-1.13.0.jar", "wasb:///example/jars/azure-documentdb-rx-0.9.0-rc2.jar", "wasb:///example/jars/json-20140107.jar", "wasb:///example/jars/rxjava-1.3.0.jar", "wasb:///example/jars/rxnetty-0.4.20.jar"],
"conf": {
"spark.jars.excludes": "org.scala-lang:scala-reflect"
}
}
database = "dc1_country_db"
collection = "states"
statesConfig = {
"Endpoint" : "https://dc1-cosmos-db.documents.azure.com:443/",
"Masterkey" : "lo1eKao8FkZ9nFf6aQ4lxuG2nuRvCaGRgEwmKIk5j4rJDQbR05VMCPdqtm0A9iQXOXKhXKEQFbmfl20BtSEZWw==",
"Database" : database,
"Collection" : collection,
"preferredRegions" : "West US",
"SamplingRatio" : "1.0",
"schema_samplesize" : "200000",
"query_custom" : "SELECT c.state_code, c.state_name FROM c"
}
states = spark.read.format("com.microsoft.azure.cosmosdb.spark").options(**statesConfig).load()
states.createOrReplaceTempView("states")
states.cache()
%%sql
select count(1) from states
%%sql
select * from states